# Purpose: Analyze polling station level data for full-scale IVR experiment
# Modified by: Mamoor Ali Khan
# Date Modified: Feb 26, 2025


# ----------
# Pre-amble
# ----------
# Clear data
rm(list = ls())

# Install and load pacman if not already done
if (!requireNamespace("pacman", quietly = TRUE)) {
   install.packages("pacman")
}


source("helpers.R")

# pacman::p_unload(all)
pacman::p_load(tidyverse, estimatr, here)




#--------
# Ingest data
# ----------

mpa <- read_csv("../1_data/2_mpa_data/02_mpas_with_treatment.csv")

psm <- read_csv("../1_data/3_ps_data/99_ps_master.csv") %>%
   mutate(
      ki_effort_sum = ki_improve_schools_any_sum_endline +
         ki_improve_roads_any_sum_endline +
         ki_improve_healthfacilities_any_sum_endline +
         ki_improve_employment_any_sum_endline +
         ki_improve_electricityinfrastructure_any_sum_endline +
         ki_improve_gasinfrastructure_any_sum_endline +
         ki_improve_drinkingwater_any_sum_endline +
         ki_improve_rubbish_any_sum_endline +
         ki_improve_security_any_sum_endline,
      ki_any_effort = ki_effort_sum > 0,
      matched_pair = paste0(pk_number, "_", matched_pair)
   ) %>%
   left_join(
      mpa %>%
         select(pa_id, party) %>%
         rename(pk_number = pa_id)
   ) %>%
   mutate(pti_party = ifelse(party == "PTI", "PTI", "Other"))

# Total voters 2018
median(psm$total_voters_2018, na.rm = TRUE)

# KI variables
table(psm$ki_status_drinkingwater)
table(psm$ki_status_drinkingwater_endline)
table(psm$ki_status_drinkingwater_scale_avg)
table(psm$ki_status_drinkingwater_scale_avg_endline)



ps_outcomes <- c(
   "mpa_share_mamoor_2018",
   "mpa_share_rcons_2018",
   "turnout_2018",
   "ki_effort_sum",
   "ki_any_effort",
   "ki_mpa_visit_june_avg_endline",
   "ki_improve_schools_any_sum_endline",
   "ki_improve_roads_any_sum_endline",
   "ki_improve_healthfacilities_any_sum_endline",
   "ki_improve_employment_any_sum_endline",
   "ki_improve_electricityinfrastructure_any_sum_endline",
   "ki_improve_gasinfrastructure_any_sum_endline",
   "ki_improve_drinkingwater_any_sum_endline",
   "ki_improve_rubbish_any_sum_endline",
   "ki_improve_security_any_sum_endline"
)

psm$ki_status_average <- psm %>%
   select(starts_with("ki_status")) %>%
   select(ends_with("scale_avg")) %>%
   as.matrix() %>%
   apply(., 1, function(x) mean(x, na.rm = TRUE))


lm_robust(ki_effort_sum ~ ki_number_schools_avg + ki_employment_level_scale_avg + ki_status_average, psm %>% filter(treated_ps == 0))
lm_robust(ki_any_effort ~ ki_number_schools_avg + ki_employment_level_scale_avg + ki_status_average, psm %>% filter(treated_ps == 0))
lm_robust(ki_mpa_visit_june_avg_endline ~ ki_number_schools_avg + ki_employment_level_scale_avg + ki_status_average, psm %>% filter(treated_ps == 0))

fit_ps_models <- function(dat, outcome_endline, outcome_baseline = NULL) {
   treatments <- c("treated_ps")

   out_dat <- map_dfr(treatments, ~ {
      base_level <- levels(as.factor(dat[[.x]]))[1]

      ctrl_dat <- dat[as.factor(dat[[.x]]) == base_level, ]

      # DIM
      dim_o <- tidy_m(lm_robust(as.formula(paste0(outcome_endline, " ~ ", .x)), dat))
      dim_o$model <- "DIM"

      # PAP
      pap_o <- tidy_m(lm_robust(as.formula(paste0(outcome_endline, " ~ ", .x, " + factor(pk_number)")), dat))
      pap_o$model <- "PAP"

      out_dat <- bind_rows(dim_o, pap_o)
      out_dat %>%
         mutate(
            treatment = .x,
            ctrl_mean = mean(ctrl_dat[[outcome_endline]], na.rm = TRUE),
            ctrl_sd = sd(ctrl_dat[[outcome_endline]], na.rm = TRUE)
         )
   })
   out_dat
}

# estimate results
ps_results <- map_dfr(ps_outcomes, ~ {
   fit_ps_models(dat = psm, outcome_endline = .x)
})

ps_labels <- c(
   "mpa_share_mamoor_2018" = "Incumbent MPA vote share",
   "mpa_share_rcons_2018" = "Incumbent MPA vote share (RCons)",
   "turnout_2018" = "Turnout share",
   "ki_effort_sum" = "N of domains where MPA made effort",
   "ki_any_effort" = "Any MPA effort (0/1)",
   "ki_mpa_visit_june_avg_endline" = "Any MPA visit in June (0/1)",
   "ki_improve_schools_any_sum_endline" = "Any MPA effort: schools (0/1)",
   "ki_improve_roads_any_sum_endline" = "Any MPA effort: roads (0/1)",
   "ki_improve_healthfacilities_any_sum_endline" = "Any MPA effort: health (0/1)",
   "ki_improve_employment_any_sum_endline" = "Any MPA effort: employment (0/1)",
   "ki_improve_electricityinfrastructure_any_sum_endline" = "Any MPA effort: electricity (0/1)",
   "ki_improve_gasinfrastructure_any_sum_endline" = "Any MPA effort: gas (0/1)",
   "ki_improve_drinkingwater_any_sum_endline" = "Any MPA effort: water (0/1)",
   "ki_improve_rubbish_any_sum_endline" = "Any MPA effort: rubbish (0/1)",
   "ki_improve_security_any_sum_endline" = "Any MPA effort: security (0/1)"
)

ps_treatment_labels <- list(
   "top_20_treated" = c("Treatment PS {P1} vs control PS {P0}"),
   "treated_ps" = c("Treatment PS vs. Control PS")
)

ps_treatment_labels_no_num <- list(
   "top_20_treated" = c("ITT"),
   "treated_ps" = c("ITT")
)

N1 <- lm_robust(mpa_share_mamoor_2018 ~ top_20_treated + factor(pk_number) + mpa_share_2013, psm)$nobs
N2 <- lm_robust(turnout_2018 ~ top_20_treated + factor(pk_number) + turnout_2013, data = psm)$nobs

ps_vote_results <- bind_rows(
   tidy(lm_robust(mpa_share_mamoor_2018 ~ top_20_treated + factor(pk_number) + mpa_share_2013, psm)) %>%
      mutate(
         ctrl_mean = mean(psm$mpa_share_mamoor_2018[psm$top_20_treated == 0], na.rm = TRUE),
         ctrl_sd = sd(psm$mpa_share_mamoor_2018[psm$top_20_treated == 0], na.rm = TRUE),
         N = N1
      ),
   tidy(lm_robust(turnout_2018 ~ top_20_treated + factor(pk_number) + turnout_2013, data = psm)) %>%
      mutate(
         ctrl_mean = mean(psm$turnout_2018[psm$top_20_treated == 0], na.rm = TRUE),
         ctrl_sd = sd(psm$turnout_2018[psm$top_20_treated == 0], na.rm = TRUE),
         N = N2
      )
) %>%
   mutate(
      model = "PAP",
      treatment = "top_20_treated"
   )

vote_tab <- prepare_results(
   outcomes = ps_outcomes[c(1, 3)],
   results = ps_vote_results,
   treatment_name = "top_20_treated",
   labels = ps_labels,
   treatment_labels = ps_treatment_labels,
   treatment_header = ""
)
vote_tab

vote_tab_new <- prepare_simple_results(
   outcomes = ps_outcomes[c(1, 3)],
   results = ps_vote_results,
   treatment_term = "top_20_treated",
   treatment_titles = c("ITT: treated PS"),
   treatment_group_titles = c("\\{P1\\} vs. \\{P0\\}"),
   control_group_title = c("\\{P0\\}"),
   control_title = c("Control mean: control PS"),
   labels = ps_labels
)
writeLines(vote_tab_new, con = "../3_output/2_tables/tab-K1.tex")


ki_tab_new <- prepare_simple_results(
   ps_outcomes[4:6],
   results = ps_results,
   treatment_term = "treated_ps",
   treatment_titles = c("ITT: treated PS"),
   treatment_group_titles = c("\\{P1\\} vs. \\{P0\\}"),
   control_group_title = c("\\{P0\\}"),
   control_title = c("Control mean: control PS"),
   labels = ps_labels
)
writeLines(ki_tab_new, con = "../3_output/2_tables/tab-J1.tex")




######
# END OF SCRIPT
#####

